#import libaries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import plotly.express as px
import rfit
df = pd.read_csv("https://ccadroit.s3.amazonaws.com/cloudComputing/train.csv")
print(df)
df = df.drop(["User_ID","Product_ID"],axis=1)
df.head()
df.tail()
from sklearn.preprocessing import LabelEncoder
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
#df['Age'] = LabelEncoder().fit_transform(df['Age'])
df['City_Category'] = LabelEncoder().fit_transform(df['City_Category'])
df['Product_Category_2'] =df['Product_Category_2'].fillna(0).astype('int64')
df['Product_Category_3'] =df['Product_Category_3'].fillna(0).astype('int64')
df.head()
df.tail()
df.isnull().sum()/df.shape[0]*100
fig = px.box(df, x="Gender", y="Purchase")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()
Finding: Men spend much on Black Friday sale
sns.countplot(df['Age'])
plt.title('Distribution of Age')
plt.xlabel('Different Categories of Age')
plt.show()
Finding: People between age 26-35 are more interested on black Friday Shopping
df.groupby("Marital_Status").mean()["Purchase"]
df.groupby("Marital_Status").mean()["Purchase"].plot(kind='bar')
plt.title("Marital_Status and Purchase Analysis")
plt.show()
Finding: Unmarried are more active on blackfriday sale
sns.countplot(df['City_Category'])
plt.show()
Finding: It is observed that city category 2 has made the most number of puchases.
df.groupby("City_Category").mean()["Purchase"].plot(kind='bar')
plt.title("City Category and Purchase Analysis")
plt.show()
However, the city whose buyers spend the most is city type 2
sns.countplot(df['Stay_In_Current_City_Years'])
plt.show()
Finding:It looks like the longest someone is living in that city the less prone they are to buy new things.